## R Script Output -------------------------------------------------------------
# Appendix Figure E.1: Effect Decomposition: Post-shock Election-year Local Economic Condition as a Mediator (left)
# Appendix Table E.1: Effect Decomposition: Sequential g-Estimation (2020 GDP Growth)


## Instructions ----------------------------------------------------------------
# Step 1: Adjust MAIN_DIR to where README.txt is located
# Step 2: Run entire script


## setup -----------------------------------------------------------------------
# clean slate
rm(list = ls())
date()

# load packages
pkg <- c("tidyverse",
         "RColorBrewer", 
         "gridExtra",
         "tools", 
         "lfe", 
         "stargazer",
         "multcomp",
         "snow",
         "xtable")

lapply(pkg, require, character.only = TRUE)

# set main directory
MAIN_DIR <- "~/Dropbox/Research/ISQ-frei-replication/"


## load data -------------------------------------------------------------------
load(file = paste(MAIN_DIR, "data-merge-county.RData", sep = ""))

# set data frames
df <- county.df

df.treat.1 <- df %>% 
  filter(year == 2012 | year == 2016)

df.treat.2 <- df %>% 
  filter(year == 2012 | year == 2020)

df.placebo <- df %>% 
  filter(year == 2008 | year == 2012)


## set formula -----------------------------------------------------------------
# outcome
dv <- "dem_share"
dv.de <- "de_dem_share"

# treatment
treat.con <- "anticorrupt + anticorrupt:cn_ug_sqmi_cat_2012_bi_high"

# mediator
mediator <- "anticorrupt:gdp_growth_2020_center + anticorrupt:cn_ug_sqmi_cat_2012_bi_high:gdp_growth_2020_center"

# controls
controls <- c("anticorrupt:cn_g_sqmi_cat_2012_bi",
              "anticorrupt:ind_ug_sqmi_cat_2012_bi",
              
              "share_pop_county_female",
              "share_pop_5_17_county",
              "share_pop_18_24_county",
              "share_pop_25_34_county",
              "share_pop_35_44_county",
              "share_pop_45_54_county",
              "share_pop_55_64_county",
              "share_pop_65_county",
              
              "share_pop_white_county",
              "share_pop_black_county",
              "share_pop_hispanic_county",
              "share_pop_asian_county",
              "share_pop_cn_county",
              
              "share_foreign_born_county",
              "share_edu_ba_above_county",
              "share_enroll_county",
              "ln_pop_density_county",
              "effective_tax_county",
              "ipw_county",
              "employ_rate_county",
              "median_household_income_county",
              "vacancy_county",
              
              "share_pop_county_female:anticorrupt",
              "share_pop_5_17_county:anticorrupt",
              "share_pop_18_24_county:anticorrupt",
              "share_pop_25_34_county:anticorrupt",
              "share_pop_35_44_county:anticorrupt",
              "share_pop_45_54_county:anticorrupt",
              "share_pop_55_64_county:anticorrupt",
              "share_pop_65_county:anticorrupt",
              
              "share_pop_white_county:anticorrupt",
              "share_pop_black_county:anticorrupt",
              "share_pop_hispanic_county:anticorrupt",
              "share_pop_asian_county:anticorrupt",
              "share_pop_cn_county:anticorrupt",
              
              "share_foreign_born_county:anticorrupt",
              "share_edu_ba_above_county:anticorrupt",
              "share_enroll_county:anticorrupt",
              "ln_pop_density_county:anticorrupt",
              "effective_tax_county:anticorrupt",
              "ipw_county:anticorrupt",
              "employ_rate_county:anticorrupt",
              "median_household_income_county:anticorrupt",
              "vacancy_county:anticorrupt")

# fixed effects
fe <- "| county_fips | 0 | county_fips"

# combine
# first stage
fmla.att <- as.formula(paste(dv, " ~ ",
                             treat.con, 
                             " + ", 
                             paste(controls, collapse = " + "),
                             fe, sep = " "))
fmla.att

fmla.first <- as.formula(paste(dv, " ~ ",
                               mediator, " + ",
                               treat.con, 
                               " + ", 
                               paste(controls, collapse = " + "),
                               fe, sep = " "))
fmla.first

fmla.second <- as.formula(paste(dv.de, " ~ ",
                                treat.con, 
                                " + ", 
                                paste(controls, collapse = " + "),
                                fe, sep = " "))
fmla.second


## sequential g-estimation -----------------------------------------------------
# Simulation takes about 11 minutes on computer with 3.6 GHz 10-Core Intel Core i9 
# and 64 GB 2667 MHz DDR4
# set first.time to FALSE to save time by loading saved simulation output
first.time <- FALSE
#first.time <- TRUE

if (first.time == FALSE) {
  
  # load simulated data
  load(file = paste(MAIN_DIR, "sim-seq-g-2020-gdp-growth.RData", sep = ""))
  
} else {
  
  # set gdp growth levels (2 s.d. below mean to above)
  gdp.growth.vec <- seq(mean(df.treat.2$gdp_growth_2020, na.rm = TRUE) - 2 * sd(df.treat.2$gdp_growth_2020, na.rm = TRUE),
                        mean(df.treat.2$gdp_growth_2020, na.rm = TRUE) + 2 * sd(df.treat.2$gdp_growth_2020, na.rm = TRUE),
                        length = 5)
  gdp.growth.vec
  
  # set number of bootstraps
  #boots <- 10
  boots <- 1000
  
  # bootstrap ACDE when local gdp growth rate is set to mean
  x <- 3
  
  # recenter vars for seq-g estimiation
  df.treat.2 <- df.treat.2 %>%
    mutate(gdp_growth_2020_center = gdp_growth_2020 - gdp.growth.vec[x])
  
  # block-bootstrap the coefs and SEs
  # All uncertainty estimates are obtained based on block bootstraps at the county level for 1,000 times.
  #library(snow)
  clusters <- unique(df.treat.2$county_fips)
  
  att.hold <- rep(NA, times = boots)
  acde.hold <- rep(NA, times = boots)
  cl <- makeCluster(4)
  
  # open time stamp
  t1 <- Sys.time()
  
  # set the seed 
  set.seed(12345)
  
  for (i in 1:boots) {
    
    # cluster bootstrap
    # sample the clusters with replacement
    units <- sample(clusters, size = length(clusters), replace = TRUE)
    
    # now use multiple cores instead of 1
    clusterExport(cl, c("df.treat.2", "units"))
    
    # cluster apply instead of regular apply
    df.bs = clusterApply(cl, units, function(x) which(df.treat.2$county_fips == x))
    
    df.star <- df.treat.2[unlist(df.bs),]
    
    # ATT of CN FREI exposure
    did.att <- felm(fmla.att, 
                    keepX = TRUE,
                    data = df.star)
    
    att.hold[i] <- coef(did.att)["anticorrupt:cn_ug_sqmi_cat_2012_bi_high"]
    
    # first stage (the effect of mediator on outcome)
    boot.first <- felm(fmla.first, 
                       keepX = TRUE,
                       data = df.star)
    summary(boot.first)
    
    # demediate outcome
    df.de <- df.star %>%
      mutate(coef_frei_gdp_growth = coef(boot.first)["anticorrupt:gdp_growth_2020_center:cn_ug_sqmi_cat_2012_bi_high"],
             coef_anticorrupt_gdp_growth = coef(boot.first)["anticorrupt:gdp_growth_2020_center"],
             de_dem_share = dem_share -
               coef_frei_gdp_growth * gdp_growth_2020_center * anticorrupt * cn_ug_sqmi_cat_2012_bi_high -
               coef_anticorrupt_gdp_growth * gdp_growth_2020_center * anticorrupt)
    
    # second stage (CDE of treatment net mediator)
    boot.direct <- felm(fmla.second, 
                        keepX = TRUE,
                        data = df.de)
    
    acde.hold[i] <- coef(boot.direct)["anticorrupt:cn_ug_sqmi_cat_2012_bi_high"]
  }
  
  # close time stamp
  t2 <- Sys.time()
  t2 - t1
  
  stopCluster(cl)
  
  # save
  sim.seq.g.3 <- tibble(gdp_growth = gdp.growth.vec[x],
                        ATT = att.hold,
                        ACDE = acde.hold,
                        AIDE = att.hold - acde.hold)
  sim.seq.g.3
  
  # reshape
  sim.seq.g.gdp.growth.l <- sim.seq.g.3 %>%
    gather(estimator, value, -gdp_growth) %>%
    mutate(estimator = factor(estimator, levels = c("ATT", "ACDE", "AIDE")))
  
  # save data sets
  save(sim.seq.g.gdp.growth.l, 
       file = paste(MAIN_DIR, "sim-seq-g-2020-gdp-growth.RData", sep = ""))
}

# summarize results
sim.seq.g.gdp.growth.sum <- sim.seq.g.gdp.growth.l %>%
  group_by(estimator) %>%
  summarize(mean = mean(value, na.rm = TRUE),
            lwr = quantile(value, 0.025),
            upr = quantile(value, 0.975))


## Figure E.1 (left) -----------------------------------------------------------
# set parameters
axis.title.size <- 14

# plot
p.boot.coef <- ggplot(data = sim.seq.g.gdp.growth.sum,
                      aes(x = estimator, 
                          y = mean,
                          ymin = lwr, 
                          ymax = upr)) +
  geom_pointrange(size = 0.7) +
  geom_hline(yintercept = 0,
             color = I(hsv(0/12, 7/12, 7/12)), 
             linetype = "dashed") + 
  scale_y_continuous("\nEstimated DiD Effect on Dem. Vote Shares",
                     limits = c(-0.0293639, 
                                0.006)) +
  scale_x_discrete("Quantity of Interest") +
  ggtitle("2020 GDP Growth") + 
  annotate("text", 
           x = 1, 
           y = sim.seq.g.gdp.growth.sum %>% 
             filter(estimator == "ATT") %>% 
             pull(lwr) - 0.0025, 
           label = "Total\nEffect", 
           size = 5) +
  annotate("text", 
           x = 2, 
           y = sim.seq.g.gdp.growth.sum %>% 
             filter(estimator == "ACDE") %>% 
             pull(lwr) - 0.0025, 
           label = "Controlled\nDirect Effect", 
           size = 5) +
  annotate("text", 
           x = 3, 
           y = sim.seq.g.gdp.growth.sum %>% 
             filter(estimator == "AIDE") %>% 
             pull(upr) + 0.003, 
           label = "Indirect Effect\n(via GDP Growth)", 
           size = 5) +
  theme_bw() +  
  theme(legend.position = "none",
        axis.title.y = element_text(size = axis.title.size + 3,
                                    margin = margin(0, 20, 0, 0)),
        axis.text.y = element_text(size = axis.title.size - 1),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        plot.title = element_text(size = axis.title.size + 4,
                                  margin = margin(20, 0, 20, 0),
                                  face = "bold",
                                  hjust = 0.5),
        panel.grid = element_blank()) 

# print
pdf(file = paste(MAIN_DIR, "Figure-E1-left.pdf", sep = "/"), 
    width = 5.7, height = 5.7)
print(p.boot.coef)
dev.off()


## Table E.1: Effect Decomposition: Sequential g-Estimation (2020 GDP Growth) --
sim.seq.g.gdp.growth.sum.out <- sim.seq.g.gdp.growth.sum %>%
  mutate(ci = paste("[", 
                    round(lwr, 5), 
                    ", ",
                    round(upr, 5), 
                    "]",
                    sep = "")) %>%
  dplyr::select(estimator, mean, ci) %>%
  mutate(estimator = str_replace_all(estimator, "ATT", "Total Effect"),
         estimator = str_replace_all(estimator, "ACDE", "Controlled Direct Effect"),
         estimator = str_replace_all(estimator, "AIDE", "Indirect Effect via 2020 Local GDP Growth")) %>%
  rename(`Quantity of Interest` = estimator,
         `Mean Estimate` = mean,
         `95% Block-Bootstrapped C.I.` = ci)

sim.ci.out.xtable <- xtable(sim.seq.g.gdp.growth.sum.out, 
                            digits = 4, 
                            caption = "Effect Decomposition: Sequential g-Estimation",
                            label = "tb:seq-g-gdp-growth-2020")

# save
print(sim.ci.out.xtable, 
      type = "html",
      file.path(MAIN_DIR, "Table-E1-2020-gdp.html"),
      include.rownames = FALSE)
# print(sim.ci.out.xtable, 
#       type = "latex",
#       file.path(TABLE_DIR, "Table-E1-2020-gdp.tex"),
#       include.rownames = FALSE)

